Figures in supplementary material

This document will allow you to reproduce all supplementary figures.

Set working directory and load all necessary libraries and functions.

# If not installed, install and load the package "here", else: only load the package.
err <- try(library("here", character.only = TRUE), silent = TRUE)
## here() starts at /Users/lgoeminn/Documents/phD/MSqRobHurdlePaper
if (class(err) == 'try-error') {
  install.packages("here", repos = "https://cloud.r-project.org")
  library("here", character.only = TRUE)
}

wd <- here()

# Optional: change the working directory
setwd(wd)

# Load all functions and libraries
source(paste0(wd, "/R/functions.r"))
## Loading required namespace: BiocManager
## 
## Attaching package: 'devtools'
## The following objects are masked from 'package:remotes':
## 
##     dev_package_deps, github_pull, github_release, install_bioc,
##     install_bitbucket, install_cran, install_deps, install_git,
##     install_github, install_local, install_svn, install_url,
##     install_version, package_deps, update_packages
## ── Attaching packages ───────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.5
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind,
##     colMeans, colnames, colSums, dirname, do.call, duplicated,
##     eval, evalq, Filter, Find, get, grep, grepl, intersect,
##     is.unsorted, lapply, lengths, Map, mapply, match, mget, order,
##     paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind,
##     Reduce, rowMeans, rownames, rowSums, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which, which.max,
##     which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:dplyr':
## 
##     exprs
## Loading required package: mzR
## Loading required package: Rcpp
## Warning in fun(libname, pkgname): mzR has been built against a different Rcpp version (0.12.16)
## than is installed on your system (0.12.17). This might lead to errors
## when loading mzR. If you encounter such issues, please send a report,
## including the output of sessionInfo() to the Bioc support forum at 
## https://support.bioconductor.org/. For details see also
## https://github.com/sneumann/mzR/wiki/mzR-Rcpp-compiler-linker-issue.
## Loading required package: BiocParallel
## Loading required package: ProtGenerics
## 
## This is MSnbase version 2.6.1 
##   Visit https://lgatto.github.io/MSnbase/ to get started.
## 
## Attaching package: 'MSnbase'
## The following object is masked from 'package:stats':
## 
##     smooth
## The following object is masked from 'package:base':
## 
##     trimws
## 
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
## 
##     plotMA
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: future
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:future':
## 
##     values
## The following object is masked from 'package:Matrix':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: GenomeInfoDb
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following objects are masked from 'package:base':
## 
##     aperm, apply
## 
## Attaching package: 'stageR'
## The following object is masked from 'package:methods':
## 
##     getMethod
source(paste0(wd,"/R/plot_functions.R"))

Give session info for reproducibility.

sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS  10.14.1
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] nl_BE.UTF-8/nl_BE.UTF-8/nl_BE.UTF-8/C/nl_BE.UTF-8/nl_BE.UTF-8
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] MSqRob_0.7.5                stageR_1.3.29              
##  [3] SummarizedExperiment_1.10.1 DelayedArray_0.6.0         
##  [5] matrixStats_0.53.1          GenomicRanges_1.32.3       
##  [7] GenomeInfoDb_1.16.0         IRanges_2.14.10            
##  [9] S4Vectors_0.18.3            furrr_0.1.0.9002           
## [11] future_1.10.0               readxl_1.1.0               
## [13] colorspace_1.3-2            zoo_1.8-2                  
## [15] lme4_1.1-17                 Matrix_1.2-14              
## [17] limma_3.36.1                MSnbase_2.6.1              
## [19] ProtGenerics_1.12.0         BiocParallel_1.14.1        
## [21] mzR_2.14.0                  Rcpp_0.12.17               
## [23] Biobase_2.40.0              BiocGenerics_0.26.0        
## [25] forcats_0.3.0               stringr_1.3.1              
## [27] dplyr_0.7.5                 purrr_0.2.5                
## [29] readr_1.1.1                 tidyr_0.8.1                
## [31] tibble_1.4.2                ggplot2_3.0.0              
## [33] tidyverse_1.2.1             MSstats_3.12.2             
## [35] devtools_1.13.5             remotes_2.0.2              
## [37] here_0.1                   
## 
## loaded via a namespace (and not attached):
##  [1] minqa_1.2.4            rprojroot_1.3-2        XVector_0.20.0        
##  [4] rstudioapi_0.7         listenv_0.7.0          affyio_1.50.0         
##  [7] ggrepel_0.8.0          lubridate_1.7.4        xml2_1.2.0            
## [10] codetools_0.2-15       splines_3.5.0          mnormt_1.5-5          
## [13] doParallel_1.0.11      impute_1.54.0          knitr_1.20            
## [16] jsonlite_1.5           nloptr_1.0.4           broom_0.4.4           
## [19] vsn_3.48.1             BiocManager_1.30.4     compiler_3.5.0        
## [22] httr_1.3.1             backports_1.1.2        assertthat_0.2.0      
## [25] lazyeval_0.2.1         cli_1.0.0              htmltools_0.3.6       
## [28] tools_3.5.0            bindrcpp_0.2.2         GenomeInfoDbData_1.1.0
## [31] gtable_0.2.0           glue_1.2.0             affy_1.58.0           
## [34] reshape2_1.4.3         MALDIquant_1.17        cellranger_1.1.0      
## [37] gdata_2.18.0           preprocessCore_1.42.0  nlme_3.1-137          
## [40] iterators_1.0.9        psych_1.8.4            globals_0.12.4        
## [43] rvest_0.3.2            gtools_3.5.0           XML_3.98-1.11         
## [46] MASS_7.3-50            zlibbioc_1.26.0        scales_0.5.0          
## [49] BiocInstaller_1.30.0   pcaMethods_1.72.0      hms_0.4.2             
## [52] doSNOW_1.0.16          yaml_2.1.19            memoise_1.1.0         
## [55] stringi_1.2.3          foreach_1.4.4          randomForest_4.6-14   
## [58] caTools_1.17.1         rlang_0.3.0.1          pkgconfig_2.0.1       
## [61] bitops_1.0-6           mzID_1.18.0            evaluate_0.10.1       
## [64] lattice_0.20-35        bindr_0.1.1            tidyselect_0.2.4      
## [67] plyr_1.8.4             magrittr_1.5           R6_2.2.2              
## [70] snow_0.4-2             gplots_3.0.1           pillar_1.2.3          
## [73] haven_1.1.1            foreign_0.8-70         withr_2.1.2           
## [76] RCurl_1.95-4.10        survival_2.42-3        modelr_0.1.2          
## [79] crayon_1.3.4           KernSmooth_2.23-15     rmarkdown_1.10        
## [82] grid_3.5.0             minpack.lm_1.2-1       data.table_1.11.4     
## [85] marray_1.58.0          digest_0.6.18          munsell_0.5.0

Important: make sure you have run the CPTAC and HEART analyses, or load the necessary files as follows:

load(paste0(wd, "/save_files_CPTAC/peptides_CPTAC2.RData"))
load(paste0(wd, "/save_files_CPTAC/peptides_CPTAC_KNN1.RData"))
load(paste0(wd, "/save_files_CPTAC/peptides_CPTAC_Perseus_imp.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_co_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_Hurdle.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_PI_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_KNN1_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_MSstats_full.RData"))
load(paste0(wd, "/save_files_CPTAC/CPTAC_Perseus_full.RData"))
load(paste0(wd, "/save_files_CPTAC/CPTAC_Perseus_imp_full.RData"))
load(paste0(wd, "/save_files_CPTAC/res_CPTAC_ProPCA_full.RData"))
load(paste0(wd, "/save_files_CPTAC/testResultOneComparison.RData"))
load(paste0(wd, "/save_files_PXD006675/peptides_HEART2.RData"))
load(paste0(wd, "/save_files_PXD006675/res_HEART_Hurdle.RData"))
load(paste0(wd, "/save_files_PXD006675/overlap_PHM.RData"))

1. Evaluate the imputation: protein level (Supplementary Fig. 1 and 2)

We here show the sample-wise effect of kNN and Perseus imputation at the level of MaxQuant’s LFQ-values (protein level). Blue are the original values, red are the imputed values.

# Import non-imputed data
file.proteinGroups <- paste0(wd,"/datasets/CPTAC/proteinGroups_preprocessed.txt")
exprs_col <- grepEcols(file.proteinGroups, "LFQ.intensity.", split = "\t")
proteinGroups.CPTAC <- readMSnSet2(file.proteinGroups, ecol = exprs_col, fnames = c("Protein.IDs"), sep = "\t")
sampleNames(proteinGroups.CPTAC) <- str_replace(sampleNames(proteinGroups.CPTAC), "LFQ.intensity.", "") %>% make.names

# Import Perseus-imputed data
file.proteinGroups <- paste0(wd,"/datasets/CPTAC/proteinGroups_preprocessed_imputed.txt")
exprs_col <- grepEcols(file.proteinGroups, "LFQ.intensity.", split = "\t")
proteinGroups.CPTAC.Perseus.imp <- readMSnSet2(file.proteinGroups, ecol = exprs_col, fnames = c("Protein.IDs"), sep = "\t")
sampleNames(proteinGroups.CPTAC.Perseus.imp) <- str_replace(sampleNames(proteinGroups.CPTAC.Perseus.imp), "LFQ.intensity.", "") %>% make.names

sample.names <- paste0(sampleNames(proteinGroups.CPTAC) %>% substr(3, 3), sampleNames(proteinGroups.CPTAC) %>% substr(5, 5))

### Impute with KNN-1
proteinGroups.CPTAC.KNN1 <- impute(proteinGroups.CPTAC, method = "knn")
## Warning in knnimp(x, k, maxmiss = rowmax, maxp = maxp): 700 rows with more than 50 % entries missing;
##  mean imputation used for these rows
cex.main <- 1.2
cex.lab <- 1
cex <- 1
par(mfrow=c(1,1))

# 1. Histograms for the effect of Perseus imputation LFQ-level (Supplementary Fig. 1)

# grDevices::svg(paste0(wd,"/Perseus_protein_sample_imp.svg"), width=60, height=90)
# cex.main <- 10
# cex.lab <- 8
# cex <- 6
# par(mfrow=c(1,3))
# par(mar=c(5.1*cex/2,4.1*cex/2,4.1*cex/2,2.1*cex/2))
# par(mfrow=c(9,3))

for(i in 1:ncol(exprs(proteinGroups.CPTAC.Perseus.imp))){
p1 <- hist(exprs(proteinGroups.CPTAC.Perseus.imp[,i]), breaks=25, plot = FALSE)
p2 <- hist(exprs(proteinGroups.CPTAC[,i]), breaks=25, plot = FALSE)
plot(p1, col=rgb(255,0,0, maxColorValue = 255), xlim=c(12,28), border=rgb(255,165,0, maxColorValue = 255), las = 1, main = paste0("Perseus imputation sample ", sample.names[i]), xlab = "log2(LFQ intensity)", cex.lab=cex.lab, cex.axis=cex, cex.main=cex.main, cex.sub=cex, lwd=cex)  # first histogram
plot(p2, col=rgb(0,0,255, maxColorValue = 255), xlim=c(12,28), border=rgb(0,165,255, maxColorValue = 255), add=TRUE)  # second histogram
}

# dev.off()

# 2. Histograms for the effect of kNN imputation at LFQ-level (Supplementary Fig. 2)

# grDevices::svg(paste0(wd,"/kNN_protein_sample_imp.svg"), width=60, height=90)
# cex.main <- 10
# cex.lab <- 8
# cex <- 6
# par(mfrow=c(1,3))
# par(mar=c(5.1*cex/2,4.1*cex/2,4.1*cex/2,2.1*cex/2))
# par(mfrow=c(9,3))

for(i in 1:ncol(exprs(proteinGroups.CPTAC.KNN1))){
p1 <- hist(exprs(proteinGroups.CPTAC.KNN1)[,i], breaks=25, plot = FALSE)
p2 <- hist(exprs(proteinGroups.CPTAC)[,i], breaks=25, plot = FALSE)
plot(p1, col=rgb(255,0,0, maxColorValue = 255), xlim=c(13,28), border=rgb(255,165,0, maxColorValue = 255), las = 1, main = paste0("kNN imputation sample ", sample.names[i]), xlab = "log2(LFQ intensity)", cex.lab=cex.lab, cex.axis=cex, cex.main=cex.main, cex.sub=cex, lwd=cex)  # first histogram
plot(p2, col=rgb(0,0,255, maxColorValue = 255), xlim=c(13,28), border=rgb(0,165,255, maxColorValue = 255), add=TRUE)  # second histogram
}

# dev.off()

2. Evaluate the imputation: peptide level (Supplementary Fig. 3 and 4)

We here show the sample-wise effect of kNN and Perseus imputation at the peptide level. Blue are the original values, red are the imputed values.

# 3. Histograms for the effect of Perseus imputation at peptide-level (Supplementary Fig. 3)

# grDevices::svg(paste0(wd,"/Perseus_sample_imp.svg"), width=60, height=90)
# cex.main <- 10
# cex.lab <- 8
# cex <- 6
# par(mfrow=c(1,3))
# par(mar=c(5.1*cex/2,4.1*cex/2,4.1*cex/2,2.1*cex/2))
# par(mfrow=c(9,3))

for(i in 1:ncol(exprs(peptides.CPTAC.Perseus.imp))){
p1 <- hist(exprs(peptides.CPTAC.Perseus.imp[,i]), breaks=25, plot = FALSE)
p2 <- hist(exprs(peptides.CPTAC2[,i]), breaks=25, plot = FALSE)
plot(p1, col=rgb(255,0,0, maxColorValue = 255), xlim=c(12,28), border=rgb(255,165,0, maxColorValue = 255), las = 1, main = paste0("Perseus imputation sample ", sample.names[i]), xlab = "log2(peptide intensity)", cex.lab=cex.lab, cex.axis=cex, cex.main=cex.main, cex.sub=cex, lwd=cex)  # first histogram
plot(p2, col=rgb(0,0,255, maxColorValue = 255), xlim=c(12,28), border=rgb(0,165,255, maxColorValue = 255), add=TRUE)  # second histogram
}

# dev.off()

# 4. Histograms for the effect of kNN imputation at peptide-level (Supplementary Fig. 4)

sample.names <- paste0(sampleNames(peptides.CPTAC.KNN1) %>% substr(3, 3), sampleNames(peptides.CPTAC.KNN1) %>% substr(5, 5))

cex.main <- 1.2
cex.lab <- 1
cex <- 1
par(mfrow=c(1,1))

# grDevices::svg(paste0(wd,"/kNN_sample_imp.svg"), width=60, height=90)
# cex.main <- 10
# cex.lab <- 8
# cex <- 6
# par(mfrow=c(1,3))
# par(mar=c(5.1*cex/2,4.1*cex/2,4.1*cex/2,2.1*cex/2))
# par(mfrow=c(9,3))

for(i in 1:ncol(exprs(peptides.CPTAC.KNN1))){
p1 <- hist(exprs(peptides.CPTAC.KNN1)[,i], breaks=25, plot = FALSE)
p2 <- hist(exprs(peptides.CPTAC2)[,i], breaks=25, plot = FALSE)
plot(p1, col=rgb(255,0,0, maxColorValue = 255), xlim=c(13,28), border=rgb(255,165,0, maxColorValue = 255), las = 1, main = paste0("kNN imputation sample ", sample.names[i]), xlab = "log2(peptide intensity)", cex.lab=cex.lab, cex.axis=cex, cex.main=cex.main, cex.sub=cex, lwd=cex)  # first histogram
plot(p2, col=rgb(0,0,255, maxColorValue = 255), xlim=c(13,28), border=rgb(0,165,255, maxColorValue = 255), add=TRUE)  # second histogram
}

# dev.off()

3. Make the FDR-FTP plots (Supplementary Fig. 5)

# FDR-FTP plot including MSstats, MSstats with Inf and kNN, based on our preprocessing

res.list <- list(res.CPTAC.full,
                 res.CPTAC.co.full,
                 res.CPTAC.Hurdle,
                 res.CPTAC.KNN1.full,
                 res.CPTAC.PI.full,
                 CPTAC.Perseus.full,
                 CPTAC.Perseus.imp.full,
                 res.CPTAC.MSstats.full,
                 res.CPTAC.MSstats.full[!is.infinite(res.CPTAC.MSstats.full$log2FC),],
                 res.CPTAC.ProPCA.full
)

contrast.vec <- c("condition6B-condition6A",
                  "condition6C-condition6B",
                  "condition6C-condition6A")

names(contrast.vec) <- c("Condition 6B vs. 6A",
                  "Condition 6C vs. 6B",
                  "Condition 6C vs. 6A")

colors <- c("#FF681E", "#E15E9E", "#5A2A82", "blue", "green", "#1B2944", "#42B7BA", "yellow3", "peru", "red2") # darkblue: "#1B2944"  gray: "#50FF00"

sort.list <- list(c("qvalue", "pvalue", "t", "logFC"),
                  c("qvalue", "pvalue", "t", "logOR"),
                  c("qchisq", "pchisq", "chisq", NA),
                  c("qvalue", "pvalue", "t", "logFC"),
                  c("qvalue", "pvalue", "t", "logFC"),
                  c("q.value", "p.value", "Test.statistic", "Difference"),
                  c("q.value", "p.value", "Test.statistic", "Difference"),
                  c("adj.pvalue", "pvalue", "Tvalue", "log2FC"),
                  c("adj.pvalue", "pvalue", "Tvalue", "log2FC"),
                  c("adj.P.Val", "P.Value", "t", "logFC"))

PlotFDPTPR(res.list, contrast.vec, colors, sort.list, TPcol = c("UPS"), plotSVG = FALSE) #TRUE

4. Make the FDR-FTP plots using MSstats preprocessing as a basis (Supplementary Fig. 6)

# Since in our preprocessing, 2 significant proteins were removed from the MSstats output, we also redo the whole thing with MSstats preprocessing as a reference.

res.CPTAC.MSstats <- as.tibble(testResultOneComparison$ComparisonResult)

res.CPTAC.MSstats <- res.CPTAC.MSstats %>% dplyr::rename(protein = Protein, contrast = Label)

res.CPTAC.MSstats$contrast <- res.CPTAC.MSstats$contrast %>% as.character

res.CPTAC.MSstats$contrast[res.CPTAC.MSstats$contrast == "6B-6A"] <- "condition6B-condition6A"
res.CPTAC.MSstats$contrast[res.CPTAC.MSstats$contrast == "6C-6A"] <- "condition6C-condition6A"
res.CPTAC.MSstats$contrast[res.CPTAC.MSstats$contrast == "6C-6B"] <- "condition6C-condition6B"



res.CPTAC.Base2 <- res.CPTAC.MSstats %>% select(protein, contrast)
res.CPTAC.Base2$UPS <- grepl("ups", res.CPTAC.Base2$protein)

res.CPTAC.MSstats.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.MSstats)
## Joining, by = c("protein", "contrast")
res.CPTAC.MSstats.full2 <- res.CPTAC.MSstats.full2 %>% arrange(adj.pvalue,pvalue)

res.CPTAC.MSstats.noInf.full2 <- res.CPTAC.MSstats.full2

res.CPTAC.MSstats.noInf.full2[is.infinite(res.CPTAC.MSstats.noInf.full2$log2FC),][,c("log2FC", "SE", "Tvalue", "DF", "pvalue", "adj.pvalue")] <- NA
res.CPTAC.MSstats.noInf.full2 <- res.CPTAC.MSstats.noInf.full2 %>% arrange(adj.pvalue,pvalue)

# Perseus without imputation #

# 1. Import Perseus results

results.Perseus <- read.table(paste0(wd,"/datasets/CPTAC/t-tests_Perseus_1_6_0_7-6A-6B-6C.txt"), sep="\t", header=TRUE, quote = "", comment.char = "")

results.Perseus <- results.Perseus[-c(1,2),] %>% map_dfr(~{.x %>% as.character %>% type.convert})

# 2. Convert wide to long

results.Perseus <- results.Perseus[,43:58]
colnames(results.Perseus) <- gsub("Student.s.T.test.","",colnames(results.Perseus))

res.Perseus <- results.Perseus %>% reshape(timevar = "contrast", varying =,1:12, direction = "long", sep = ".6")
## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.
res.Perseus$contrast[res.Perseus$contrast == "B_6A"] <- "condition6B-condition6A"
res.Perseus$contrast[res.Perseus$contrast == "C_6A"] <- "condition6C-condition6A"
res.Perseus$contrast[res.Perseus$contrast == "C_6B"] <- "condition6C-condition6B"

res.Perseus <- res.Perseus %>% dplyr::rename(protein = Majority.protein.IDs, protein.name = Protein.names, gene.name = Gene.names) %>% select(-Protein.IDs)

UPS.Perseus <- unique(res.Perseus$protein)[grepl("ups", unique(res.Perseus$protein))]
UPS.MSstats <- unique(res.CPTAC.MSstats$protein)[grepl("ups", unique(res.CPTAC.MSstats$protein))]
UPS.Hurdle <- unique(res.CPTAC.Hurdle$protein)[grepl("ups", unique(res.CPTAC.Hurdle$protein))]

UPS.MSstats[!(UPS.MSstats %in% UPS.Perseus)]
## [1] P01008ups;CON__P41361             P68871ups;CON__P02070;CON__Q3SX09
## [3] P99999ups;CON__P62894            
## 1446 Levels: A5Z2X5 D6VTK4 O00762ups O13516 O13547 O13563 O14455 ... Q99385
# P01008ups;CON__P41361 P68871ups;CON__P02070;CON__Q3SX09 P99999ups;CON__P62894
UPS.Perseus[!(UPS.Perseus %in% UPS.MSstats)]
## [1] P01008ups P68871ups P99999ups
## 1462 Levels: A5Z2X5 D6VTK4 O00762ups O13516 O13547 O13563 O14455 ... Q99385
# P01008ups P68871ups P99999ups

# -> all three of them link perfectly with each other, thus we will fill them in

CPTAC.Perseus.full2 <- res.CPTAC.Base2 %>% left_join(res.Perseus)
## Joining, by = c("protein", "contrast")
## Warning: Column `protein` joining factors with different levels, coercing
## to character vector
CPTAC.Perseus.full2[CPTAC.Perseus.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus[res.Perseus$protein == "P01008ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]

CPTAC.Perseus.full2[CPTAC.Perseus.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus[res.Perseus$protein == "P68871ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]

CPTAC.Perseus.full2[CPTAC.Perseus.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus[res.Perseus$protein == "P99999ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]

CPTAC.Perseus.full2 <- CPTAC.Perseus.full2 %>% arrange(p.value)

# Perseus with imputation #

results.Perseus.imp <- read.table(paste0(wd,"/datasets/CPTAC/t-tests_Perseus_imputed_1_6_0_7-6A-6B-6C.txt"), sep="\t", header=TRUE, quote = "", comment.char = "")

results.Perseus.imp <- results.Perseus.imp[-c(1,2),] %>% map_dfr(~{.x %>% as.character %>% type.convert})

# 2. Convert wide to long

results.Perseus.imp <- results.Perseus.imp[,43:58]
colnames(results.Perseus.imp) <- gsub("Student.s.T.test.","",colnames(results.Perseus.imp))

res.Perseus.imp <- results.Perseus.imp %>% reshape(timevar = "contrast", varying =,1:12, direction = "long", sep = ".6")
## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.

## Warning: Setting row names on a tibble is deprecated.
res.Perseus.imp$contrast[res.Perseus.imp$contrast == "B_6A"] <- "condition6B-condition6A"
res.Perseus.imp$contrast[res.Perseus.imp$contrast == "C_6A"] <- "condition6C-condition6A"
res.Perseus.imp$contrast[res.Perseus.imp$contrast == "C_6B"] <- "condition6C-condition6B"

res.Perseus.imp <- res.Perseus.imp %>% dplyr::rename(protein = Majority.protein.IDs, protein.name = Protein.names, gene.name = Gene.names) %>% select(-Protein.IDs)

CPTAC.Perseus.imp.full2 <- res.CPTAC.Base2 %>% left_join(res.Perseus.imp)
## Joining, by = c("protein", "contrast")
## Warning: Column `protein` joining factors with different levels, coercing
## to character vector
CPTAC.Perseus.imp.full2[CPTAC.Perseus.imp.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus.imp[res.Perseus.imp$protein == "P01008ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]

CPTAC.Perseus.imp.full2[CPTAC.Perseus.imp.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus.imp[res.Perseus.imp$protein == "P68871ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]

CPTAC.Perseus.imp.full2[CPTAC.Perseus.imp.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")] <- res.Perseus.imp[res.Perseus.imp$protein == "P99999ups",][,c("contrast", "p.value", "q.value", "Difference", "Test.statistic")]

CPTAC.Perseus.imp.full2 <- CPTAC.Perseus.imp.full2 %>% arrange(p.value)

# Hurdle model #

UPS.MSstats[!(UPS.MSstats %in% UPS.Hurdle)]
## [1] P01008ups;CON__P41361             P01112ups                        
## [3] P09211ups                         P62988ups                        
## [5] P68871ups;CON__P02070;CON__Q3SX09 P99999ups;CON__P62894            
## 1446 Levels: A5Z2X5 D6VTK4 O00762ups O13516 O13547 O13563 O14455 ... Q99385
# P01008ups;CON__P41361 P01112ups P09211ups P62988ups P68871ups;CON__P02070;CON__Q3SX09 P99999ups;CON__P62894
UPS.Hurdle[!(UPS.Hurdle %in% UPS.MSstats)]
## [1] P99999ups               P01008ups               P68871ups              
## [4] P05759;P0CG63;P62988ups
## 1655 Levels:  A5Z2X5 ... Q99385
# P99999ups               P01008ups               P68871ups   P05759;P0CG63;P62988ups

# -> everything except P01112ups and P09211ups link up
# => insert the estimates of the 4 others

res.CPTAC.Hurdle2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.Hurdle)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factors with different levels, coercing
## to character vector
res.CPTAC.Hurdle2[res.CPTAC.Hurdle2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")] <- res.CPTAC.Hurdle[res.CPTAC.Hurdle$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")]

res.CPTAC.Hurdle2[res.CPTAC.Hurdle2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")] <- res.CPTAC.Hurdle[res.CPTAC.Hurdle$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")]

res.CPTAC.Hurdle2[res.CPTAC.Hurdle2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")] <- res.CPTAC.Hurdle[res.CPTAC.Hurdle$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")]

res.CPTAC.Hurdle2[res.CPTAC.Hurdle2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")] <- res.CPTAC.Hurdle[res.CPTAC.Hurdle$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "logOR", "chisq", "pchisq", "qchisq")]

res.CPTAC.Hurdle2 <- res.CPTAC.Hurdle2 %>% arrange(pchisq)

# MSqRob without preprocessing #

res.CPTAC.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factor and character vector, coercing
## into character vector
res.CPTAC.full2[res.CPTAC.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.full[res.CPTAC.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.full2[res.CPTAC.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.full[res.CPTAC.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.full2[res.CPTAC.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.full[res.CPTAC.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.full2[res.CPTAC.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.full[res.CPTAC.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.full2 <- res.CPTAC.full2 %>% arrange(pvalue)

# Quasibinomial model #

res.CPTAC.co.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.co.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factors with different levels, coercing
## to character vector
res.CPTAC.co.full2[res.CPTAC.co.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.co.full[res.CPTAC.co.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.co.full2[res.CPTAC.co.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.co.full[res.CPTAC.co.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.co.full2[res.CPTAC.co.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.co.full[res.CPTAC.co.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.co.full2[res.CPTAC.co.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.co.full[res.CPTAC.co.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logOR", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.co.full2 <- res.CPTAC.co.full2 %>% arrange(pvalue)

# MSqRob with kNN imputation #

res.CPTAC.KNN1.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.KNN1.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factor and character vector, coercing
## into character vector
res.CPTAC.KNN1.full2[res.CPTAC.KNN1.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.KNN1.full[res.CPTAC.KNN1.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.KNN1.full2[res.CPTAC.KNN1.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.KNN1.full[res.CPTAC.KNN1.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.KNN1.full2[res.CPTAC.KNN1.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.KNN1.full[res.CPTAC.KNN1.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.KNN1.full2[res.CPTAC.KNN1.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.KNN1.full[res.CPTAC.KNN1.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.KNN1.full2 <- res.CPTAC.KNN1.full2 %>% arrange(pvalue)

# MSqRob with Perseus imputation #

res.CPTAC.PI.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.PI.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factor and character vector, coercing
## into character vector
res.CPTAC.PI.full2[res.CPTAC.PI.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.PI.full[res.CPTAC.PI.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.PI.full2[res.CPTAC.PI.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.PI.full[res.CPTAC.PI.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.PI.full2[res.CPTAC.PI.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.PI.full[res.CPTAC.PI.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.PI.full2[res.CPTAC.PI.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")] <- res.CPTAC.PI.full[res.CPTAC.PI.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "se", "t", "df", "pvalue", "qvalue")]

res.CPTAC.PI.full2 <- res.CPTAC.PI.full2 %>% arrange(pvalue)

# ProPCA with limma #

res.CPTAC.ProPCA.full
## # A tibble: 4,143 x 11
##    protein   UPS   gene.name protein.name contrast     logFC AveExpr     t
##    <chr>     <lgl> <fct>     <fct>        <chr>        <dbl>   <dbl> <dbl>
##  1 Q06830ups TRUE  ""        ""           condition6C… 0.552   1.91  32.9 
##  2 Q06830ups TRUE  ""        ""           condition6C… 0.313   1.91  18.7 
##  3 Q06830ups TRUE  ""        ""           condition6B… 0.238   1.91  14.2 
##  4 P12081ups TRUE  ""        ""           condition6C… 0.822   1.52   9.85
##  5 P10636-8… TRUE  ""        ""           condition6C… 1.00    2.12   9.45
##  6 P00915ups TRUE  ""        ""           condition6C… 0.336   0.633  7.45
##  7 P12081ups TRUE  ""        ""           condition6B… 0.611   1.52   7.32
##  8 P02144ups TRUE  ""        ""           condition6C… 0.159   0.877  7.00
##  9 P10145ups TRUE  ""        ""           condition6C… 0.145   0.876  6.89
## 10 P55957ups TRUE  ""        ""           condition6C… 0.593   0.688  6.65
## # ... with 4,133 more rows, and 3 more variables: P.Value <dbl>,
## #   adj.P.Val <dbl>, B <dbl>
res.CPTAC.ProPCA.full2 <- res.CPTAC.Base2 %>% left_join(res.CPTAC.ProPCA.full)
## Joining, by = c("protein", "contrast", "UPS")
## Warning: Column `protein` joining factor and character vector, coercing
## into character vector
res.CPTAC.ProPCA.full2[res.CPTAC.ProPCA.full2$protein == "P99999ups;CON__P62894",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")] <- res.CPTAC.ProPCA.full[res.CPTAC.ProPCA.full$protein == "P99999ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")]

res.CPTAC.ProPCA.full2[res.CPTAC.ProPCA.full2$protein == "P01008ups;CON__P41361",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")] <- res.CPTAC.ProPCA.full[res.CPTAC.ProPCA.full$protein == "P01008ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")]

res.CPTAC.ProPCA.full2[res.CPTAC.ProPCA.full2$protein == "P68871ups;CON__P02070;CON__Q3SX09",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")] <- res.CPTAC.ProPCA.full[res.CPTAC.ProPCA.full$protein == "P68871ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")]

res.CPTAC.ProPCA.full2[res.CPTAC.ProPCA.full2$protein == "P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")] <- res.CPTAC.ProPCA.full[res.CPTAC.ProPCA.full$protein == "P05759;P0CG63;P62988ups",][,c("contrast", "gene.name", "protein.name", "logFC", "AveExpr", "P.Value", "adj.P.Val", "B")]

res.CPTAC.ProPCA.full2 <- res.CPTAC.ProPCA.full2 %>% arrange(P.Value)


### FDR-FTP plots with MSstats preprocessing as a basis (Supplementary Fig. 6) ###

res.list <- list(res.CPTAC.full2,
                 res.CPTAC.co.full2,
                 res.CPTAC.Hurdle2,
                 res.CPTAC.KNN1.full2,
                 res.CPTAC.PI.full2,
                 CPTAC.Perseus.full2,
                 CPTAC.Perseus.imp.full2,
                 res.CPTAC.MSstats.full2,
                 res.CPTAC.MSstats.full2[!is.infinite(res.CPTAC.MSstats.full2$log2FC),],
                 res.CPTAC.ProPCA.full2
)

contrast.vec <- c("condition6B-condition6A",
                  "condition6C-condition6B",
                  "condition6C-condition6A")

names(contrast.vec) <- c("Condition 6B vs. 6A",
                  "Condition 6C vs. 6B",
                  "Condition 6C vs. 6A")

colors <- c("#FF681E", "#E15E9E", "#5A2A82", "blue", "green", "#1B2944", "#42B7BA", "yellow3", "peru", "red2") # darkblue: "#1B2944"  gray: "#50FF00"

sort.list <- list(c("qvalue", "pvalue", "t", "logFC"),
                  c("qvalue", "pvalue", "t", "logOR"),
                  c("qchisq", "pchisq", "chisq", NA),
                  c("qvalue", "pvalue", "t", "logFC"),
                  c("qvalue", "pvalue", "t", "logFC"),
                  c("q.value", "p.value", "Test.statistic", "Difference"),
                  c("q.value", "p.value", "Test.statistic", "Difference"),
                  c("adj.pvalue", "pvalue", "Tvalue", "log2FC"),
                  c("adj.pvalue", "pvalue", "Tvalue", "log2FC"),
                  c("adj.P.Val", "P.Value", "t", "logFC"))

PlotFDPTPR(res.list, contrast.vec, colors, sort.list, TPcol = c("UPS"), plotSVG = FALSE) #TRUE

5. Correlation between intensities and counts (Supplementary Fig. 7)

# Make the plots

# BA, CB, CA
upratio <- c(1.565597, 1.571906, 3.137504)

# 1. Top: condition 6B vs. 6A

TP.CvsB <- res.CPTAC.Hurdle %>% filter(contrast == "condition6B-condition6A" & UPS)

FP.CvsB <- res.CPTAC.Hurdle %>% filter(contrast == "condition6B-condition6A" & !UPS)

scatterHist(x = ((FP.CvsB$logOR) %>% unlist),
            y = ((FP.CvsB$logFC) %>% unlist),
            x2 = ((TP.CvsB$logOR) %>% unlist),
            y2 = ((TP.CvsB$logFC) %>% unlist), 
            main = "Condition B vs. A", plotSVG = FALSE, filename = paste0(wd,"/scatterhistBA.svg")) #, plotSVG = TRUE, filename = paste0(wd,"/scatterhist.svg")

# 2. Middle: condition 6C vs. 6A

TP.CvsA <- res.CPTAC.Hurdle %>% filter(contrast == "condition6C-condition6A" & UPS)

FP.CvsA <- res.CPTAC.Hurdle %>% filter(contrast == "condition6C-condition6A" & !UPS)

scatterHist(x = ((FP.CvsA$logOR) %>% unlist),
            y = ((FP.CvsA$logFC) %>% unlist),
            x2 = ((TP.CvsA$logOR) %>% unlist),
            y2 = ((TP.CvsA$logFC) %>% unlist), 
            main = "Condition C vs. A", plotSVG = FALSE, filename = paste0(wd,"/scatterhistCA.svg")) #, plotSVG = TRUE, filename = paste0(wd,"/scatterhist.svg")

# 3. Bottom: condition 6C vs. 6B

TP.CvsB <- res.CPTAC.Hurdle %>% filter(contrast == "condition6C-condition6B" & UPS)

FP.CvsB <- res.CPTAC.Hurdle %>% filter(contrast == "condition6C-condition6B" & !UPS)

scatterHist(x = ((FP.CvsB$logOR) %>% unlist),
            y = ((FP.CvsB$logFC) %>% unlist),
            x2 = ((TP.CvsB$logOR) %>% unlist),
            y2 = ((TP.CvsB$logFC) %>% unlist), 
            main = "Condition C vs. B", plotSVG = FALSE, filename = paste0(wd,"/scatterhistCB.svg")) #, plotSVG = TRUE, filename = paste0(wd,"/scatterhist.svg")

6. Plot example proteins for each of the overlapping regions in the Venn diagram (Supplementary Fig. 8 - 12)

Aditionally, we here provide the plots for the underlying (preprocessed) peptide-level data for the 1500 most significant proteins for each method in the HEART dataset, grouped per method.

### Overlap all (Supplementary Fig. 8)

plot_proteins(gene.names = "MYL7", title = "MYL7", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "NPPA", title = "NPPA", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "PAM", title = "PAM", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "MYBPHL", title = "MYBPHL", msnset = peptides.HEART2, pdf = FALSE)

# plot_proteinsSVG(gene.names = c("MYL7", "NPPA", "PAM", "MYBPHL"), title = c("overlap_all.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.MSqRob.AvsV.1500[(genes.MSqRob.AvsV.1500 %in% genes.Perseus.AvsV.1500) & (genes.MSqRob.AvsV.1500 %in% genes.Hurdle.AvsV.1500)], title = "overlap_all.pdf", msnset = peptides.HEART2, pdf = TRUE)

### Overlap Hurdle and Perseus (Supplementary Fig. 9)

plot_proteins(gene.names = "CHGB", title = "CHGB", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "PRKCD", title = "PRKCD", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "PRR33", title = "PRR33", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "NTN1", title = "NTN1", msnset = peptides.HEART2, pdf = FALSE)

# plot_proteinsSVG(gene.names = c("CHGB", "PRKCD", "PRR33", "NTN1"), title = c("overlap_hurdle_Perseus.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.Hurdle.AvsV.1500[(genes.Hurdle.AvsV.1500 %in% genes.Perseus.AvsV.1500) & !(genes.Hurdle.AvsV.1500 %in% genes.MSqRob.AvsV.1500)], title = "overlap_hurdle_Perseus.pdf", msnset = peptides.HEART2, pdf = TRUE)

### Hurdle alone (Supplementary Fig. 10)

plot_proteins(gene.names = "PDS5A", title = "PDS5A", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "CNOT1", title = "CNOT1", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "PLVAP", title = "PLVAP", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "RELA", title = "RELA", msnset = peptides.HEART2, pdf = FALSE)

# plot_proteinsSVG(gene.names = c("PDS5A", "CNOT1", "PLVAP", "RELA"), title = c("only_hurdle.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.Hurdle.AvsV.1500[!(genes.Hurdle.AvsV.1500 %in% genes.MSqRob.AvsV.1500) & !(genes.Hurdle.AvsV.1500 %in% genes.Perseus.AvsV.1500)], title = "only_hurdle.pdf", msnset = peptides.HEART2, pdf = TRUE)

### Perseus alone (Supplementary Fig. 11)

plot_proteins(gene.names = "ASNSD1", title = "ASNSD1", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "LMO7", title = "LMO7", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "MTCL1", title = "MTCL1", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "BLOC1S5;BLOC1S5-TXNDC5", title = "BLOC1S5;BLOC1S5-TXNDC5", msnset = peptides.HEART2, pdf = FALSE)

# plot_proteinsSVG(gene.names = c("ASNSD1", "LMO7", "MTCL1", "BLOC1S5;BLOC1S5-TXNDC5"), title = c("only_Perseus.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.Perseus.AvsV.1500[!(genes.Perseus.AvsV.1500 %in% genes.MSqRob.AvsV.1500) & !(genes.Perseus.AvsV.1500 %in% genes.Hurdle.AvsV.1500)], title = "only_Perseus.pdf", msnset = peptides.HEART2, pdf = TRUE)

### Overlap MSqRob and Perseus (Supplementary Fig. 12)

plot_proteins(gene.names = "ACO1", title = "ACO1", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "HNRNPK", title = "HNRNPK", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "SLC25A20", title = "SLC25A20", msnset = peptides.HEART2, pdf = FALSE)

plot_proteins(gene.names = "TPM4", title = "TPM4", msnset = peptides.HEART2, pdf = FALSE)

# plot_proteinsSVG(gene.names = c("ACO1", "HNRNPK", "SLC25A20", "TPM4"), title = c("overlap_MSqRob_Perseus.svg"), msnset = peptides.HEART2)
# plot_proteins(gene.names = genes.MSqRob.AvsV.1500[(genes.MSqRob.AvsV.1500 %in% genes.Perseus.AvsV.1500) & !(genes.MSqRob.AvsV.1500 %in% genes.Hurdle.AvsV.1500)], title = "overlap_MSqRob_Perseus.pdf", msnset = peptides.HEART2, pdf = TRUE)

7. Generate the data for the other Venn diagrams for the HEART dataset (Supplementary Fig. 13)

### Venn diagrams atrial vs. ventricular region ###

### Checks: ###

all(Perseus.AvsV.ov$`Gene names` %in% hurdle.AvsV.ov$gene.name)
## [1] TRUE
all(Perseus.AvsV.ov$`Gene names` %in% MSqRob.AvsV.ov$gene.name)
## [1] TRUE
all(hurdle.AvsV.ov$gene.name %in% Perseus.AvsV.ov$`Gene names`)
## [1] TRUE
all(MSqRob.AvsV.ov$gene.name %in% Perseus.AvsV.ov$`Gene names`)
## [1] TRUE
dim(MSqRob.AvsV.ov)
## [1] 7822    9
dim(hurdle.AvsV.ov)
## [1] 7822    8
length(unique(Perseus.AvsV.ov$`Gene names`))
## [1] 7822
################

### Top: the first 500 significant genes ###

# A. Select top 500 DA gene identifiers for the Perseus analysis of ventricles vs. atria
genes.Perseus.AvsV.500 <- (Perseus.AvsV.ov %>% pull(`Gene names`) %>% unique)[1:500]

# B. Select top 500 DA gene identifiers for the Hurdle analysis of atria vs. ventricles
genes.Hurdle.AvsV.500 <- hurdle.AvsV.ov[1:500,]$gene.name

# C. Select top 500 DA gene identifiers for the MSqRob analysis of atria vs. ventricles
genes.MSqRob.AvsV.500 <- MSqRob.AvsV.ov[1:500,]$gene.name

# MSqRob alone

sum(!(genes.MSqRob.AvsV.500 %in% genes.Hurdle.AvsV.500) & !(genes.MSqRob.AvsV.500 %in% genes.Perseus.AvsV.500))
## [1] 110
# 110

# Hurdle alone

sum(!(genes.Hurdle.AvsV.500 %in% genes.MSqRob.AvsV.500) & !(genes.Hurdle.AvsV.500 %in% genes.Perseus.AvsV.500))
## [1] 99
# 99

# Perseus alone

sum(!(genes.Perseus.AvsV.500 %in% genes.MSqRob.AvsV.500) & !(genes.Perseus.AvsV.500 %in% genes.Hurdle.AvsV.500))
## [1] 214
# 214

# Overlap MSqRob and Hurdle

sum((genes.MSqRob.AvsV.500 %in% genes.Hurdle.AvsV.500) & !(genes.MSqRob.AvsV.500 %in% genes.Perseus.AvsV.500))
## [1] 144
# 144

# Overlap MSqRob and Perseus

sum((genes.MSqRob.AvsV.500 %in% genes.Perseus.AvsV.500) & !(genes.MSqRob.AvsV.500 %in% genes.Hurdle.AvsV.500))
## [1] 29
# 43

# Overlap Hurdle and Perseus

sum((genes.Hurdle.AvsV.500 %in% genes.Perseus.AvsV.500) & !(genes.Hurdle.AvsV.500 %in% genes.MSqRob.AvsV.500))
## [1] 40
# 40

# Overlap everything

sum((genes.MSqRob.AvsV.500 %in% genes.Perseus.AvsV.500) & (genes.MSqRob.AvsV.500 %in% genes.Hurdle.AvsV.500))
## [1] 217
# 217

# Control:
length(genes.MSqRob.AvsV.500)
## [1] 500
110+217+144+29
## [1] 500
length(genes.Hurdle.AvsV.500)
## [1] 500
99+144+217+40
## [1] 500
length(genes.Perseus.AvsV.500)
## [1] 500
214+40+29+217
## [1] 500
### Bottom: the first 1000 significant genes ###

# A. Select top 1000 DA gene identifiers for the Perseus analysis of ventricles vs. atria
genes.Perseus.AvsV.1000 <- (Perseus.AvsV.ov %>% pull(`Gene names`) %>% unique)[1:1000]

# B. Select top 1000 DA gene identifiers for the Hurdle analysis of atria vs. ventricles
genes.Hurdle.AvsV.1000 <- hurdle.AvsV.ov[1:1000,]$gene.name

# C. Select top 1000 DA gene identifiers for the MSqRob analysis of atria vs. ventricles
genes.MSqRob.AvsV.1000 <- MSqRob.AvsV.ov[1:1000,]$gene.name

# MSqRob alone

sum(!(genes.MSqRob.AvsV.1000 %in% genes.Hurdle.AvsV.1000) & !(genes.MSqRob.AvsV.1000 %in% genes.Perseus.AvsV.1000))
## [1] 210
# 210

# Hurdle alone

sum(!(genes.Hurdle.AvsV.1000 %in% genes.MSqRob.AvsV.1000) & !(genes.Hurdle.AvsV.1000 %in% genes.Perseus.AvsV.1000))
## [1] 158
# 158

# Perseus alone

sum(!(genes.Perseus.AvsV.1000 %in% genes.MSqRob.AvsV.1000) & !(genes.Perseus.AvsV.1000 %in% genes.Hurdle.AvsV.1000))
## [1] 416
# 416

# Overlap MSqRob and Hurdle

sum((genes.MSqRob.AvsV.1000 %in% genes.Hurdle.AvsV.1000) & !(genes.MSqRob.AvsV.1000 %in% genes.Perseus.AvsV.1000))
## [1] 325
# 325

# Overlap MSqRob and Perseus

sum((genes.MSqRob.AvsV.1000 %in% genes.Perseus.AvsV.1000) & !(genes.MSqRob.AvsV.1000 %in% genes.Hurdle.AvsV.1000))
## [1] 67
# 67

# Overlap Hurdle and Perseus

sum((genes.Hurdle.AvsV.1000 %in% genes.Perseus.AvsV.1000) & !(genes.Hurdle.AvsV.1000 %in% genes.MSqRob.AvsV.1000))
## [1] 119
# 119

# Overlap everything

sum((genes.MSqRob.AvsV.1000 %in% genes.Perseus.AvsV.1000) & (genes.MSqRob.AvsV.1000 %in% genes.Hurdle.AvsV.1000))
## [1] 398
# 398

# Control:
length(genes.MSqRob.AvsV.1000)
## [1] 1000
210+325+398+67
## [1] 1000
length(genes.Hurdle.AvsV.1000)
## [1] 1000
158+325+398+119
## [1] 1000
length(genes.Perseus.AvsV.1000)
## [1] 1000
416+67+119+398
## [1] 1000